import pyspark
from pyspark import SparkConf
from pyspark.sql import SparkSession
spark = SparkSession \
.builder \
.appName("Dadosfera case study") \
.config("spark.some.config.option", "some-value") \
.getOrCreate()
spark.sparkContext.getConf().getAll()
[('spark.some.config.option', 'some-value'),
('spark.app.id', 'local-1645194382937'),
('spark.executor.id', 'driver'),
('spark.app.name', 'Dadosfera case study'),
('spark.driver.port', '56884'),
('spark.rdd.compress', 'True'),
('spark.serializer.objectStreamReset', '100'),
('spark.app.startTime', '1645194380930'),
('spark.master', 'local[*]'),
('spark.submit.pyFiles', ''),
('spark.submit.deployMode', 'client'),
('spark.driver.host', 'DESKTOP-MGDJI74'),
('spark.sql.warehouse.dir', 'file:/C:/Users/Usuario/spark-warehouse'),
('spark.ui.showConsoleProgress', 'true')]
spark
SparkSession - in-memory
folder = "C:\\Users\\Usuario\\Downloads\\data_trips"
import glob
txtfiles = []
for file in glob.glob(folder+"\\*"):
txtfiles.append(file)
txtfiles
['C:\\Users\\Usuario\\Downloads\\data_trips\\data-sample_data-nyctaxi-trips-2009-json_corrigido.json', 'C:\\Users\\Usuario\\Downloads\\data_trips\\data-sample_data-nyctaxi-trips-2010-json_corrigido.json', 'C:\\Users\\Usuario\\Downloads\\data_trips\\data-sample_data-nyctaxi-trips-2011-json_corrigido.json', 'C:\\Users\\Usuario\\Downloads\\data_trips\\data-sample_data-nyctaxi-trips-2012-json_corrigido.json']
begin = True
for file in txtfiles:
if begin:
df = spark.read.json(file)
begin = False
else:
df = df.union(spark.read.json(file))
df.createOrReplaceTempView("trips")
first_answer = spark.sql("SELECT trip_distance, passenger_count FROM trips WHERE passenger_count <= 2").toPandas()
first_answer
| trip_distance | passenger_count | |
|---|---|---|
| 0 | 0.80 | 2 |
| 1 | 5.40 | 1 |
| 2 | 4.13 | 1 |
| 3 | 2.03 | 1 |
| 4 | 1.15 | 1 |
| ... | ... | ... |
| 3319647 | 1.40 | 1 |
| 3319648 | 1.60 | 2 |
| 3319649 | 1.80 | 1 |
| 3319650 | 1.70 | 2 |
| 3319651 | 2.00 | 1 |
3319652 rows × 2 columns
import plotly.express as px
# fig = px.box(first_answer, y="trip_distance")
# fig.show()
import holoviews as hv
points = hv.Points(first_answer, 'trip_distance')
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) ~\AppData\Local\Temp/ipykernel_8536/1089732015.py in <module> ----> 1 points = hv.Points(first_answer, 'trip_distance') ~\anaconda3\lib\site-packages\holoviews\element\selection.py in __init__(self, *args, **kwargs) 21 22 def __init__(self, *args, **kwargs): ---> 23 super(SelectionIndexExpr, self).__init__(*args, **kwargs) 24 self._index_skip = False 25 ~\anaconda3\lib\site-packages\holoviews\core\data\__init__.py in __init__(self, data, kdims, vdims, **kwargs) 342 datatype=kwargs.get('datatype')) 343 (data, self.interface, dims, extra_kws) = initialized --> 344 super(Dataset, self).__init__(data, **dict(kwargs, **dict(dims, **extra_kws))) 345 self.interface.validate(self, validate_vdims) 346 ~\anaconda3\lib\site-packages\holoviews\core\dimension.py in __init__(self, data, kdims, vdims, **params) 852 params['cdims'] = {d if isinstance(d, Dimension) else Dimension(d): val 853 for d, val in params['cdims'].items()} --> 854 super(Dimensioned, self).__init__(data, **params) 855 self.ndims = len(self.kdims) 856 cdims = [(d.name, val) for d, val in self.cdims.items()] ~\anaconda3\lib\site-packages\holoviews\core\dimension.py in __init__(self, data, id, plot_id, **params) 511 params['group'] = long_name 512 --> 513 super(LabelledData, self).__init__(**params) 514 if not util.group_sanitizer.allowable(self.group): 515 raise ValueError("Supplied group %r contains invalid characters." % ~\anaconda3\lib\site-packages\param\parameterized.py in __init__(self, **params) 3144 3145 self.param._generate_name() -> 3146 self.param._setup_params(**params) 3147 object_count += 1 3148 ~\anaconda3\lib\site-packages\param\parameterized.py in override_initialization(self_, *args, **kw) 1361 original_initialized = parameterized_instance.initialized 1362 parameterized_instance.initialized = False -> 1363 fn(parameterized_instance, *args, **kw) 1364 parameterized_instance.initialized = original_initialized 1365 return override_initialization ~\anaconda3\lib\site-packages\param\parameterized.py in _setup_params(self_, **params) 1614 self.param.warning("Setting non-parameter attribute %s=%s using a mechanism intended only for parameters", name, val) 1615 # i.e. if not desc it's setting an attribute in __dict__, not a Parameter -> 1616 setattr(self, name, val) 1617 1618 # PARAM2_DEPRECATION: Backwards compatibilitity for param<1.12 ~\anaconda3\lib\site-packages\param\parameterized.py in _f(self, obj, val) 351 instance_param.__set__(obj, val) 352 return --> 353 return f(self, obj, val) 354 return _f 355 ~\anaconda3\lib\site-packages\param\parameterized.py in __set__(self, obj, val) 1175 val = self.set_hook(obj,val) 1176 -> 1177 self._validate(val) 1178 1179 _old = NotImplemented ~\anaconda3\lib\site-packages\param\__init__.py in _validate(self, val) 1396 """ 1397 self._validate_value(val, self.allow_None) -> 1398 self._validate_bounds(val, self.bounds) 1399 self._validate_item_type(val, self.item_type) 1400 ~\anaconda3\lib\site-packages\param\__init__.py in _validate_bounds(self, val, bounds) 1407 if min_length is not None and max_length is not None: 1408 if not (min_length <= l <= max_length): -> 1409 raise ValueError("%s: list length must be between %s and %s (inclusive)"%(self.name,min_length,max_length)) 1410 elif min_length is not None: 1411 if not min_length <= l: ValueError: kdims: list length must be between 2 and 2 (inclusive)
import pandas as pd
import holoviews as hv
from holoviews.operation.timeseries import rolling, rolling_outlier_std
hv.extension('bokeh')
first_answer
| trip_distance | passenger_count | |
|---|---|---|
| 0 | 0.80 | 2 |
| 1 | 5.40 | 1 |
| 2 | 4.13 | 1 |
| 3 | 2.03 | 1 |
| 4 | 1.15 | 1 |
| ... | ... | ... |
| 3319647 | 1.40 | 1 |
| 3319648 | 1.60 | 2 |
| 3319649 | 1.80 | 1 |
| 3319650 | 1.70 | 2 |
| 3319651 | 2.00 | 1 |
3319652 rows × 2 columns
first_answer['trip_distance'].mean()
2.6625269962030926
import seaborn as sns
sns.set_theme(style="whitegrid")
ax = sns.boxplot(y=first_answer["trip_distance"])
first_answer.describe()
| trip_distance | passenger_count | |
|---|---|---|
| count | 3.319652e+06 | 3.319652e+06 |
| mean | 2.662527e+00 | 1.219706e+00 |
| std | 3.112605e+00 | 4.143705e-01 |
| min | 0.000000e+00 | 0.000000e+00 |
| 25% | 1.000000e+00 | 1.000000e+00 |
| 50% | 1.680000e+00 | 1.000000e+00 |
| 75% | 3.000000e+00 | 1.000000e+00 |
| max | 4.970000e+01 | 2.000000e+00 |
sns.histplot(data=first_answer, x="trip_distance", kde=True)
<AxesSubplot:xlabel='trip_distance', ylabel='Count'>
sns.histplot(data=first_answer, x="trip_distance", kde=True, hue="passenger_count")
<AxesSubplot:xlabel='trip_distance', ylabel='Count'>
first_answer[first_answer['passenger_count']==0]
| trip_distance | passenger_count | |
|---|---|---|
| 6257 | 0.0 | 0 |
| 16334 | 0.6 | 0 |
| 18399 | 0.6 | 0 |
| 27688 | 0.0 | 0 |
| 51794 | 0.6 | 0 |
| ... | ... | ... |
| 3284510 | 6.0 | 0 |
| 3290893 | 1.5 | 0 |
| 3294944 | 0.6 | 0 |
| 3297524 | 1.5 | 0 |
| 3299504 | 2.1 | 0 |
444 rows × 2 columns
sns.histplot(data=first_answer[first_answer['passenger_count']==0], x="trip_distance", kde=True)
<AxesSubplot:xlabel='trip_distance', ylabel='Count'>
sqlDF = spark.sql("SELECT vendor_id, sum(total_amount) as total, count(total_amount) as count FROM trips group by vendor_id order by total desc")
sqlDF.show()
+---------+--------------------+-------+ |vendor_id| total| count| +---------+--------------------+-------+ | CMT| 1.954908428000008E7|1916128| | VTS|1.9043433999999914E7|1833052| | DDS| 2714901.7200000025| 250816| | TS| 45.6| 4| +---------+--------------------+-------+
sqlDF = spark.sql("SELECT * FROM trips WHERE vendor_id = 'TS'")
sqlDF.show()
+--------------------+----------------+-----------------+-----------+---------------+------------+--------------------+---------------+----------------+---------+------------------+---------+----------+------------+------------+-------------+---------+ | dropoff_datetime|dropoff_latitude|dropoff_longitude|fare_amount|passenger_count|payment_type| pickup_datetime|pickup_latitude|pickup_longitude|rate_code|store_and_fwd_flag|surcharge|tip_amount|tolls_amount|total_amount|trip_distance|vendor_id| +--------------------+----------------+-----------------+-----------+---------------+------------+--------------------+---------------+----------------+---------+------------------+---------+----------+------------+------------+-------------+---------+ |2009-08-22T14:45:...| 40.787885| -73.954648| 10.9| 2| CASH|2009-08-22T14:33:...| 40.74291| -73.98244| null| null| 0.5| 0.0| 0.0| 11.4| 3.8| TS| |2010-01-28T19:10:...| 40.787885| -73.954648| 10.9| 2| CASH|2010-01-28T19:00:...| 40.74291| -73.98244| null| null| 0.5| 0.0| 0.0| 11.4| 3.8| TS| |2011-10-18T20:27:...| 40.787885| -73.954648| 10.9| 2| CASH|2011-10-18T20:19:...| 40.74291| -73.98244| null| null| 0.5| 0.0| 0.0| 11.4| 3.8| TS| |2012-07-11T10:11:...| 40.787885| -73.954648| 10.9| 2| CASH|2012-07-11T10:04:...| 40.74291| -73.98244| null| null| 0.5| 0.0| 0.0| 11.4| 3.8| TS| +--------------------+----------------+-----------------+-----------+---------------+------------+--------------------+---------------+----------------+---------+------------------+---------+----------+------------+------------+-------------+---------+
test = spark.sql("SELECT * FROM trips WHERE vendor_id = 'TS'").toPandas()
test
| dropoff_datetime | dropoff_latitude | dropoff_longitude | fare_amount | passenger_count | payment_type | pickup_datetime | pickup_latitude | pickup_longitude | rate_code | store_and_fwd_flag | surcharge | tip_amount | tolls_amount | total_amount | trip_distance | vendor_id | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2009-08-22T14:45:03.951506+00:00 | 40.787885 | -73.954648 | 10.9 | 2 | CASH | 2009-08-22T14:33:16.751404+00:00 | 40.74291 | -73.98244 | None | NaN | 0.5 | 0.0 | 0.0 | 11.4 | 3.8 | TS |
| 1 | 2010-01-28T19:10:06.358666+00:00 | 40.787885 | -73.954648 | 10.9 | 2 | CASH | 2010-01-28T19:00:36.795723+00:00 | 40.74291 | -73.98244 | None | NaN | 0.5 | 0.0 | 0.0 | 11.4 | 3.8 | TS |
| 2 | 2011-10-18T20:27:33.827441+00:00 | 40.787885 | -73.954648 | 10.9 | 2 | CASH | 2011-10-18T20:19:22.258244+00:00 | 40.74291 | -73.98244 | None | NaN | 0.5 | 0.0 | 0.0 | 11.4 | 3.8 | TS |
| 3 | 2012-07-11T10:11:00.242403+00:00 | 40.787885 | -73.954648 | 10.9 | 2 | CASH | 2012-07-11T10:04:38.066432+00:00 | 40.74291 | -73.98244 | None | NaN | 0.5 | 0.0 | 0.0 | 11.4 | 3.8 | TS |
sqlDF = spark.sql("SELECT vendor_id, sum(total_amount + tip_amount + surcharge - tolls_amount) as total, count(total_amount) as count FROM trips group by vendor_id order by total desc")
sqlDF.show()
+---------+--------------------+-------+ |vendor_id| total| count| +---------+--------------------+-------+ | VTS|2.0189957039999817E7|1833052| | CMT|2.0051976479999762E7|1916128| | DDS| 2853020.600000002| 250816| | TS| 47.6| 4| +---------+--------------------+-------+
sqlDF = spark.sql("SELECT distinct(payment_type) FROM trips")
sqlDF.show()
+------------+ |payment_type| +------------+ | No Charge| | CASH| | Credit| | Cash| | Dispute| | CREDIT| +------------+
sqlDF = spark.sql("SELECT sum(total_amount), count(total_amount), MONTH(pickup_datetime) as month, YEAR(pickup_datetime) as year FROM trips group by year, month order by year, month")
sqlDF.show()
+-----------------+-------------------+-----+----+ |sum(total_amount)|count(total_amount)|month|year| +-----------------+-------------------+-----+----+ |848481.0900000015| 82415| 1|2009| |849284.3299999984| 82192| 2|2009| |945950.0300000007| 91353| 3|2009| |904153.0399999974| 87787| 4|2009| |943034.3999999976| 91211| 5|2009| |907902.9799999993| 88094| 6|2009| |942157.6100000003| 91234| 7|2009| |945951.6600000031| 91331| 8|2009| |910582.5799999982| 88331| 9|2009| |942709.1199999984| 91293| 10|2009| |912818.9599999993| 88288| 11|2009| |273840.6000000002| 26471| 12|2009| |841325.4200000009| 81673| 1|2010| |850524.2200000021| 82350| 2|2010| |936354.5600000024| 90511| 3|2010| |908271.2899999989| 87657| 4|2010| | 935763.11| 90728| 5|2010| |910471.2899999971| 87731| 6|2010| |933746.3699999999| 90602| 7|2010| |932630.0399999997| 90387| 8|2010| +-----------------+-------------------+-----+----+ only showing top 20 rows
test = spark.sql("SELECT sum(total_amount), count(total_amount), MONTH(pickup_datetime) as month, YEAR(pickup_datetime) as year FROM trips WHERE payment_type in ('Cash', 'CASH') group by year, month order by year, month").toPandas()
test
| sum(total_amount) | count(total_amount) | month | year | |
|---|---|---|---|---|
| 0 | 627267.13 | 66824 | 1 | 2009 |
| 1 | 625186.60 | 66680 | 2 | 2009 |
| 2 | 696467.42 | 73948 | 3 | 2009 |
| 3 | 667394.58 | 71142 | 4 | 2009 |
| 4 | 698388.04 | 74151 | 5 | 2009 |
| 5 | 669432.45 | 71342 | 6 | 2009 |
| 6 | 697615.20 | 74076 | 7 | 2009 |
| 7 | 697139.14 | 74021 | 8 | 2009 |
| 8 | 671736.14 | 71511 | 9 | 2009 |
| 9 | 695397.38 | 74011 | 10 | 2009 |
| 10 | 670152.72 | 71424 | 11 | 2009 |
| 11 | 204377.30 | 21583 | 12 | 2009 |
| 12 | 622337.46 | 66179 | 1 | 2010 |
| 13 | 624890.13 | 66837 | 2 | 2010 |
| 14 | 695007.47 | 73616 | 3 | 2010 |
| 15 | 671593.76 | 71075 | 4 | 2010 |
| 16 | 690537.78 | 73663 | 5 | 2010 |
| 17 | 670187.13 | 70987 | 6 | 2010 |
| 18 | 690775.09 | 73487 | 7 | 2010 |
| 19 | 688273.39 | 73358 | 8 | 2010 |
| 20 | 665003.80 | 71113 | 9 | 2010 |
| 21 | 692699.55 | 73648 | 10 | 2010 |
| 22 | 666169.22 | 70876 | 11 | 2010 |
| 23 | 243079.32 | 25874 | 12 | 2010 |
| 24 | 520871.44 | 55570 | 1 | 2011 |
| 25 | 667657.01 | 70990 | 2 | 2011 |
| 26 | 734770.64 | 78285 | 3 | 2011 |
| 27 | 714801.75 | 76122 | 4 | 2011 |
| 28 | 736945.01 | 78591 | 5 | 2011 |
| 29 | 711644.71 | 75734 | 6 | 2011 |
| 30 | 740846.28 | 78713 | 7 | 2011 |
| 31 | 745906.48 | 78855 | 8 | 2011 |
| 32 | 715333.65 | 76246 | 9 | 2011 |
| 33 | 740357.19 | 78580 | 10 | 2011 |
| 34 | 591419.94 | 63027 | 11 | 2011 |
| 35 | 759740.19 | 80926 | 1 | 2012 |
| 36 | 735060.27 | 78451 | 2 | 2012 |
| 37 | 783597.32 | 83368 | 3 | 2012 |
| 38 | 764028.96 | 81298 | 4 | 2012 |
| 39 | 789521.00 | 83935 | 5 | 2012 |
| 40 | 760125.42 | 80814 | 6 | 2012 |
| 41 | 791345.04 | 84421 | 7 | 2012 |
| 42 | 785371.96 | 83431 | 8 | 2012 |
| 43 | 766579.46 | 81286 | 9 | 2012 |
| 44 | 685184.48 | 72783 | 10 | 2012 |
ax = sns.barplot(x="month", y="sum(total_amount)", hue="year", data=test)
sqlDF = spark.sql("SELECT * FROM trips WHERE pickup_datetime between '2011-02-01' and '2011-02-05' ")
sqlDF.show()
+--------------------+----------------+-----------------+-----------+---------------+------------+--------------------+---------------+----------------+---------+------------------+---------+----------+------------+------------+-------------+---------+ | dropoff_datetime|dropoff_latitude|dropoff_longitude|fare_amount|passenger_count|payment_type| pickup_datetime|pickup_latitude|pickup_longitude|rate_code|store_and_fwd_flag|surcharge|tip_amount|tolls_amount|total_amount|trip_distance|vendor_id| +--------------------+----------------+-----------------+-----------+---------------+------------+--------------------+---------------+----------------+---------+------------------+---------+----------+------------+------------+-------------+---------+ |2011-02-04T13:45:...| 40.738353| -74.002237| 12.1| 1| Credit|2011-02-04T13:36:...| 40.763628| -73.956142| null| null| 1.0| 3.0| 0.0| 16.1| 4.1| VTS| |2011-02-03T00:25:...| 40.74865| -73.978213| 4.9| 1| CASH|2011-02-03T00:18:...| 40.757912| -73.987015| null| null| 0.0| 0.0| 0.0| 4.9| 0.85| VTS| |2011-02-04T17:10:...| 40.76764| -73.955103| 6.9| 2| CASH|2011-02-04T17:02:...| 40.748987| -73.974662| null| null| 0.0| 0.0| 0.0| 6.9| 1.84| VTS| |2011-02-01T03:03:...| 40.81954| -73.953983| 22.9| 5| CASH|2011-02-01T02:49:...| 40.721385| -74.004608| null| null| 1.0| 0.0| 0.0| 23.9| 8.63| VTS| |2011-02-01T04:38:...| 40.721187| -74.010043| 8.9| 1| CASH|2011-02-01T04:31:...| 40.73965| -73.984692| null| null| 0.0| 0.0| 0.0| 8.9| 2.72| VTS| |2011-02-01T21:30:...| 40.752753| -73.975333| 5.7| 5| CASH|2011-02-01T21:17:...| 40.763102| -73.967735| null| null| 1.0| 0.0| 0.0| 6.7| 0.89| VTS| |2011-02-04T07:46:...| 40.727866| -73.98225| 12.5| 2| CASH|2011-02-04T07:34:...| 40.764334| -73.96896| null| null| 0.5| 0.0| 0.0| 13.0| 3.4| DDS| |2011-02-04T06:40:...| 40.741143| -73.998067| 5.0| 1| Cash|2011-02-04T06:31:...| 40.739177| -74.006429| null| null| 0.0| 0.0| 0.0| 5.0| 0.8| CMT| |2011-02-04T02:07:...| 40.760225| -73.973082| 6.6| 1| Cash|2011-02-04T01:59:...| 40.772713| -73.977549| null| null| 0.0| 0.0| 0.0| 6.6| 1.4| CMT| |2011-02-03T09:43:...| 40.738065| -73.983586| 4.9| 1| Cash|2011-02-03T09:35:...| 40.72777| -73.983387| null| null| 0.0| 0.0| 0.0| 4.9| 0.9| CMT| |2011-02-04T11:20:...| 40.75304| -73.987491| 6.9| 3| Cash|2011-02-04T11:12:...| 40.738115| -74.003779| null| null| 0.0| 0.0| 0.0| 6.9| 1.5| CMT| |2011-02-02T02:51:...| 40.720601| -73.985222| 7.0| 1| Cash|2011-02-02T02:42:...| 40.725551| -74.004357| null| null| 0.0| 0.0| 0.0| 7.0| 1.3| CMT| |2011-02-03T02:46:...| 40.753529| -73.977958| 5.3| 1| Cash|2011-02-03T02:42:...| 40.759811| -73.984697| null| null| 0.0| 0.0| 0.0| 5.3| 0.7| CMT| |2011-02-03T17:35:...| 40.851285| -73.940091| 18.2| 1| Cash|2011-02-03T17:28:...| 40.767269| -73.983266| null| null| 0.0| 0.0| 0.0| 18.2| 7.3| CMT| |2011-02-03T08:42:...| 40.791748| -73.968587| 8.1| 2| CASH|2011-02-03T08:34:...| 40.7789| -73.962533| null| null| 0.0| 0.0| 0.0| 8.1| 1.68| VTS| |2011-02-03T00:33:...| 40.780733| -73.94612| 10.9| 1| CASH|2011-02-03T00:25:...| 40.751407| -73.982158| null| null| 1.0| 0.0| 0.0| 11.9| 3.66| VTS| |2011-02-01T07:26:...| 40.738602| -74.002527| 5.3| 1| Credit|2011-02-01T07:12:...| 40.721728| -74.008325| null| null| 0.5| 1.5| 0.0| 7.3| 1.3| VTS| |2011-02-03T14:23:...| 40.763353| -73.97936| 5.7| 1| CASH|2011-02-03T14:12:...| 40.75584| -73.97532| null| null| 1.0| 0.0| 0.0| 6.7| 0.79| VTS| |2011-02-04T20:08:...| 40.739775| -73.998777| 8.9| 1| CASH|2011-02-04T19:56:...| 40.759947| -73.975647| null| null| 1.0| 0.0| 0.0| 9.9| 2.16| VTS| |2011-02-03T23:51:...| 40.780722| -73.957058| 8.5| 5| CASH|2011-02-03T23:42:...| 40.758972| -73.972563| null| null| 0.5| 0.0| 0.0| 9.0| 2.42| VTS| +--------------------+----------------+-----------------+-----------+---------------+------------+--------------------+---------------+----------------+---------+------------------+---------+----------+------------+------------+-------------+---------+ only showing top 20 rows
test = spark.sql("SELECT concat(concat((case when length(DAY(pickup_datetime))<2 then concat('0', string(DAY(pickup_datetime))) else string(DAY(pickup_datetime)) end), '/'), (case when length(MONTH(pickup_datetime))<2 then concat('0', string(MONTH(pickup_datetime))) else string(MONTH(pickup_datetime)) end)) as dia, DAY(pickup_datetime) as dias, MONTH(pickup_datetime) as meses, sum(tip_amount) as gorjeta FROM trips WHERE YEAR(pickup_datetime) = 2012 and MONTH(pickup_datetime) > 7 group by meses, dias order by meses, dias").toPandas()
test
| dia | dias | meses | gorjeta | |
|---|---|---|---|---|
| 0 | 01/08 | 1 | 8 | 1268.04 |
| 1 | 02/08 | 2 | 8 | 1473.60 |
| 2 | 03/08 | 3 | 8 | 1315.18 |
| 3 | 04/08 | 4 | 8 | 1314.96 |
| 4 | 05/08 | 5 | 8 | 1297.76 |
| ... | ... | ... | ... | ... |
| 83 | 23/10 | 23 | 10 | 1420.38 |
| 84 | 24/10 | 24 | 10 | 1236.91 |
| 85 | 25/10 | 25 | 10 | 1253.87 |
| 86 | 26/10 | 26 | 10 | 1397.91 |
| 87 | 27/10 | 27 | 10 | 1217.40 |
88 rows × 4 columns
sns.lineplot(x="dia", y="gorjeta",data=test)
<AxesSubplot:xlabel='dia', ylabel='gorjeta'>
from matplotlib import pyplot as plt
import seaborn as sns
plt.figure(figsize=(60,50))
plt.xlabel('xlabel', fontsize=50)
plt.ylabel('ylabel', fontsize=50)
plt.xticks(fontsize=25, rotation=45)
plt.yticks(fontsize=40)
ax = sns.lineplot(x="dia", y="gorjeta",data=test)
# ax.tick_params(axis='both', which='major', labelsize=30)
test
| dia | dias | meses | gorjeta | |
|---|---|---|---|---|
| 0 | 01/08 | 1 | 8 | 1268.04 |
| 1 | 02/08 | 2 | 8 | 1473.60 |
| 2 | 03/08 | 3 | 8 | 1315.18 |
| 3 | 04/08 | 4 | 8 | 1314.96 |
| 4 | 05/08 | 5 | 8 | 1297.76 |
| ... | ... | ... | ... | ... |
| 83 | 23/10 | 23 | 10 | 1420.38 |
| 84 | 24/10 | 24 | 10 | 1236.91 |
| 85 | 25/10 | 25 | 10 | 1253.87 |
| 86 | 26/10 | 26 | 10 | 1397.91 |
| 87 | 27/10 | 27 | 10 | 1217.40 |
88 rows × 4 columns
ax = sns.lineplot(x="dias", y="gorjeta",data=test, hue='meses')
#configuração da dimensões da tela de apresentação
from notebook.services.config import ConfigManager
cm = ConfigManager()
#defina as dimensões da apresentação.
cm.update('livereveal', {
'width': 1000,
'height': 600,
'scroll': True,
})
{'width': 1000, 'height': 600, 'scroll': True}
!jupyter nbconvert teste.ipynb --to slides --post serve --template output_toggle.tpl
[NbConvertApp] Converting notebook teste.ipynb to slides
Traceback (most recent call last):
File "C:\Users\Usuario\anaconda3\Scripts\jupyter-nbconvert-script.py", line 10, in <module>
sys.exit(main())
File "C:\Users\Usuario\anaconda3\lib\site-packages\jupyter_core\application.py", line 264, in launch_instance
return super(JupyterApp, cls).launch_instance(argv=argv, **kwargs)
File "C:\Users\Usuario\anaconda3\lib\site-packages\traitlets\config\application.py", line 846, in launch_instance
app.start()
File "C:\Users\Usuario\anaconda3\lib\site-packages\nbconvert\nbconvertapp.py", line 346, in start
self.convert_notebooks()
File "C:\Users\Usuario\anaconda3\lib\site-packages\nbconvert\nbconvertapp.py", line 518, in convert_notebooks
self.convert_single_notebook(notebook_filename)
File "C:\Users\Usuario\anaconda3\lib\site-packages\nbconvert\nbconvertapp.py", line 483, in convert_single_notebook
output, resources = self.export_single_notebook(notebook_filename, resources, input_buffer=input_buffer)
File "C:\Users\Usuario\anaconda3\lib\site-packages\nbconvert\nbconvertapp.py", line 412, in export_single_notebook
output, resources = self.exporter.from_filename(notebook_filename, resources=resources)
File "C:\Users\Usuario\anaconda3\lib\site-packages\nbconvert\exporters\exporter.py", line 181, in from_filename
return self.from_file(f, resources=resources, **kw)
File "C:\Users\Usuario\anaconda3\lib\site-packages\nbconvert\exporters\exporter.py", line 199, in from_file
return self.from_notebook_node(nbformat.read(file_stream, as_version=4), resources=resources, **kw)
File "C:\Users\Usuario\anaconda3\lib\site-packages\nbconvert\exporters\html.py", line 129, in from_notebook_node
return super().from_notebook_node(nb, resources, **kw)
File "C:\Users\Usuario\anaconda3\lib\site-packages\nbconvert\exporters\templateexporter.py", line 390, in from_notebook_node
output = self.template.render(nb=nb_copy, resources=resources)
File "C:\Users\Usuario\anaconda3\lib\site-packages\jinja2\environment.py", line 1090, in render
self.environment.handle_exception()
File "C:\Users\Usuario\anaconda3\lib\site-packages\jinja2\environment.py", line 832, in handle_exception
reraise(*rewrite_traceback_stack(source=source))
File "C:\Users\Usuario\anaconda3\lib\site-packages\jinja2\_compat.py", line 28, in reraise
raise value.with_traceback(tb)
File "C:\Users\Usuario\output_toggle.tpl", line 5, in top-level template code
{%- extends 'slides_reveal.tpl' -%}
jinja2.exceptions.TemplateNotFound: slides_reveal.tpl